import requests
import time  # 导入 time 模块用于实现睡眠功能
# pip install lxml
# 我有两个python，想安装python3对应的，所以采取下面这种安装方式
# D:/Python/Python311/python.exe -m pip install lxml 
from lxml import etree

base_url="https://www.douluodalu.cc"
url = "/book/douluodalu1/1.html"

has_next=True
while has_next:
    headers = {
        "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/137.0.0.0 Safari/537.36"
    }
    response = requests.get(base_url+url, headers=headers)
    response.encoding = "utf-8"
    # print(response.text)

    tree = etree.HTML(response.text)
    # 标题
    title = tree.xpath("//h1/text()")[0]
    # 正文
    info_list = tree.xpath('//div[@class="m-post"]/p/text()')
    # 将 info_list 中的元素拼接成一个字符串，使用换行符 '\n' 分隔
    info_text = "\n".join(info_list)

    # 保存
    with open("斗罗大陆.txt", "a", encoding="utf-8") as f:
        f.write(title + "\n\n" + info_text + "\n\n")
    
    print(f'拉取完成章节：{title}')

    time.sleep(0.3)  # 每次循环结束后睡眠 3 秒

    # 获取下一页链接
    next_url_list = tree.xpath('//tr/td[2]/a/@href')
    # 检查 next_url_list 是否为空
    if next_url_list:
        url = next_url_list[0]
        print(f'下一页链接为：{url}')
        # 若下一页链接为列表，停止循环
        if url == '/book/douluodalu1/':
            break

    

    